********************************************************************************
*** REGRESS RECALL INDICATOR ON INDUSTRY, REGION, CALENDAR MONTH
********************************************************************************
local filepath "e:/NYU/projects/duration dependence/stata/austrian data/dta files"
local filepath3 "e:/NYU/projects/duration dependence/stata/austrian data/RP for IG"		

********************************************************************************
* ADD FID IDENTIFIER TO THE NON-EMPLOYMENT SPELLS
********************************************************************************
	
	use "`filepath'/epi_all.dta", clear
	
	* ADD BIRTHYEAR AND GENDER
	merge m:1 pid using "`filepath'/pid_sex_birthyear.dta"
	drop if _merge==2
	drop _merge
	
	* KEEP ONLY EMPLOYMENT
	keep if lfstatus ==1 & ins_type=="US"	
	
	* DROP FULLY-OVERLAPPING SPELLS
	sort pid lfstatus bdate
	forval kk = 1/150{
	   sort pid lfstatus bdate
	   gen byte complete_overlap = 0
	   replace complete_overlap = 1 if bdate[_n-1]<=bdate & edate[_n-1]>=edate & pid==pid[_n-1] & lfstatus==lfstatus[_n-1]
	   drop if complete_overlap==1
	   drop complete_overlap
    }
	save  "`filepath'/RP_epi_UStype_drop_full_overlap.dta", replace
*/	

	
	use "`filepath3'/RP_workers_longest_segment.dta", clear	
	sort pid bdate_spell
	by pid: gen spell_index= _n
	
	sort pid lfstatus
	by pid lfstatus: gen spell_status_index = _n
	replace spell_status_index = . if lfstatus~=2
	save "`filepath3'/RP_workers_longest_segment_index.dta", replace
*/
	
	

	use "`filepath3'/RP_workers_longest_segment_index.dta", clear
	set more off
	sum spell_status_index
	local rmax = r(max)
		
	forval k = 1/`rmax'{
		use "`filepath3'/RP_workers_longest_segment_index.dta", clear
		keep if spell_status_index == `k'
		keep pid spell_index spell_status_index bdate_spell edate_spell
		save "`filepath3'/RP_workers_longest_segment_index_`k'.dta", replace
	}
*/	


	set more off
	forval k = 1/`rmax'{	
			
		use  "`filepath'/RP_epi_UStype_drop_full_overlap.dta", clear
		merge m:1 pid using "`filepath3'/RP_workers_longest_segment_index_`k'.dta"
		keep if _merge==3
		gen bdate_emp_beg  = edate_spell+1
		format %td bdate_emp_beg 
		gen xx = 1 if bdate_emp_beg == bdate
		keep if xx == 1
		
		* if there are multiple such jobs, keep the longer one
		gen dur = edate-bdate
		sort pid spell_index dur
		by pid spell_index: gen k1 = _n
		by pid spell_index: gen k2 = _N
		keep if k1==k2
		
		keep pid fid spell_index
		rename fid fid_next
		save "`filepath3'/RP_workers_longest_segment_index_`k'_fidnext.dta", replace
	}	
*/	
	
	set more off
	forval k = 1/`rmax'{
		use  "`filepath'/RP_epi_UStype_drop_full_overlap.dta", clear
		merge m:1 pid using "`filepath3'/RP_workers_longest_segment_index_`k'.dta"
		keep if _merge==3
		gen edate_emp  = bdate_spell-1
		format %td edate_emp 
		gen xx = 1 if edate_emp == edate
		keep if xx == 1
		
		* if there are multiple such jobs, keep the longer one
		gen dur = edate-bdate
		sort pid spell_index dur
		by pid spell_index: gen k1 = _n
		by pid spell_index: gen k2 = _N
		keep if k1==k2
		
		keep pid fid spell_index fid
		rename fid fid_previous
		save "`filepath3'/RP_workers_longest_segment_index_`k'_fidprevious.dta", replace
	}	
*/	
	

	use "`filepath3'/RP_workers_longest_segment_index.dta", clear
	gen double fid_previous_temp = .
	gen double fid_next_temp = .
	
	set more off
	forval k = 1/211{		
		merge 1:1 pid spell_index using "`filepath3'/RP_workers_longest_segment_index_`k'_fidprevious.dta"
		drop _merge
		merge 1:1 pid spell_index using "`filepath3'/RP_workers_longest_segment_index_`k'_fidnext.dta"
		drop _merge	

		replace fid_previous_temp = fid_previous if fid_previous~=.
		replace fid_next_temp = fid_next if fid_next~=.
		
		drop fid_next fid_previous
	}	
	rename fid_previous_temp fid_previous
	rename fid_next_temp fid_next	
	save "`filepath3'/RP_workers_longest_segment_fidpreviousnext.dta", replace
*/

********************************************************************************
* ADD INDUSTRY, EDUCATION, AGE, 
********************************************************************************

	use "`filepath3'/RP_workers_longest_segment_fidpreviousnext.dta", clear
	
	* keep only unemployment spells
	keep if lfstatus == 2
	
	* add industry and region to previous employer
	gen double fid = fid_previous
	merge m:1 fid using "`filepath'/fid_industry_region.dta"
	drop if _merge ==2
	drop _merge
	
	* add education
	gen year =year(bdate_spell)
	merge m:1 pid year using "`filepath'/educpid.dta"	
	drop if _merge==2
	drop _merge
	drop year	
	replace educ_imp = 0 if educ_imp==.
	replace educ_max = 0 if educ_max==.
	
	* age	
	gen age = year(bdate_spell) - birthyear		
	
	* recall 
	gen byte recall = 0
	replace recall = 1 if fid_next == fid_previous & fid_previous~=.
	replace recall = . if fid_next == .

	* gender
	gen byte male = 0
	replace male = 1 if sex==1
	drop sex
	

	* OTHER VARIABLES
	   gen byte NACE10 = floor(industry/1000)
	   gen byte region = floor(fid_district/100)
	   gen byte month = month(bdate_spell)
   
	* RUN REGRESSION TOP PREDICT RECALLS
		reg recall male i.NACE10 i.region i.month if recall ~=.
		predict precall_OLS, xb
		logit recall male i.NACE10 i.region i.month if recall ~=. 
		predict precall_LOG
		probit recall male i.NACE10 i.region i.month  if recall ~=.
		predict precall_PRO
		
		replace precall_OLS = round(precall_OLS*10000)/10000
		replace precall_LOG = round(precall_LOG*10000)/10000
		replace precall_PRO = round(precall_PRO*10000)/10000

	save "`filepath3'/RP_workers_longest_segment_nonemp_observables.dta", replace
*/

********************************************************************************	
* MERGE WITH SELECTED SAMPLE
********************************************************************************

	use "`filepath3'/RP_IG_raw_0_105_RP_allinfo.dta", clear
	keep pid bdate_spell edate_spell duration spell_number	A1 B1 B3a B4
	rename spell_number spell_number_orig
	merge 1:1 pid bdate_spell edate_spell using "`filepath3'/RP_workers_longest_segment_nonemp_observables.dta"
	keep if _merge ==3
	drop _merge	
	
	keep pid spell_number duration A1 B1 B3a B4 industry - precall_PRO
	order pid spell_number
	
	sort pid
	by pid: egen educ_max_max = max(educ_max)
	replace educ_max = educ_max_max if educ_max==0
	drop educ_max_max
	
	drop educ educ_cat educ_imp industry fid_district fid_nuts month
	rename educ_max educ
	
	replace precall_OLS = round(precall_OLS*10^5)/10^5
	replace precall_LOG = round(precall_LOG*10^5)/10^5
	replace precall_PRO = round(precall_PRO*10^5)/10^5
	
	reshape wide duration age recall NACE10 region precall_OLS precall_LOG precall_PRO, i(pid) j(spell_number)	
	gen pid_new = _n
	order pid_new duration* age* recall* NACE10* region* precall_OLS* precall_LOG* precall_PRO* pid
	save "`filepath3'/IG_raw_0_105_RP_with_observables.dta", replace	
	export delimited pid_new duration* age* recall* NACE10* region* precall_OLS* precall_LOG* precall_PRO* A1 B1 B3a B4 educ male using "`filepath3'/IG_raw_0_105_RP_with_observables.csv", replace	

	
	
